pacman::p_load(sf, tmap, sfdep, tidyverse)Take Home Exercise 1
Getting Started
Geospatial Data
bus stop geospatial data in singapore
busStops <- st_read(dsn = "data/geospatial",
layer = "BusStop") %>%
st_transform(crs = 3414)Reading layer `BusStop' from data source
`/Users/michaelberlian/Desktop/SMU Nov/ISSS624 GeoSpatial/ISSS624/Take-Home_Ex/Take-Home_Ex1/data/geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 5161 features and 3 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 3970.122 ymin: 26482.1 xmax: 48284.56 ymax: 52983.82
Projected CRS: SVY21
Plotting Bus Stop points
tmap_mode("view")tmap mode set to interactive viewing
tm_shape(busStops) +
tm_dots()Hexagonal Grid Creation
creating our own hexagonal gridd
grid = st_make_grid(busStops, c(500), what = "polygons", square = FALSE)
# To sf and add grid ID
grid_sf = st_sf(grid) %>%
# add grid ID
mutate(grid_id = 1:length(lengths(grid)))grid_sf$n_colli = lengths(st_intersects(grid_sf, busStops))
# remove grid without value of 0 (i.e. no points in side that grid)
grid_count = filter(grid_sf,n_colli > 0 )tmap_mode("view")tmap mode set to interactive viewing
tm_shape(grid_count) +
tm_fill(
col = "n_colli",
palette = "Blues",
style = "cont",
title = "Number of collisions",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
popup.vars = c(
"Number of collisions: " = "n_colli"
),
popup.format = list(
n_colli = list(format = "f", digits = 0)
)
) +
tm_borders(col = "grey40", lwd = 0.7)removing bus stop grids outside of singapore
grid_count_rm <- grid_count %>%
filter(!grid_id == 1767,
!grid_id == 2073,
!grid_id == 2135,
!grid_id == 2104)Aspatial Data
importing the bus trip data of August 2023.
un-comment the one of other 2 lines and comment the first line using (cmd/ctrl+shift+c) to switch the data set into the other months.
busTrips <- read_csv("data/aspatial/origin_destination_bus_202308.csv")Rows: 5709512 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): YEAR_MONTH, DAY_TYPE, PT_TYPE, ORIGIN_PT_CODE, DESTINATION_PT_CODE
dbl (2): TIME_PER_HOUR, TOTAL_TRIPS
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# busTrips <- read_csv("data/aspatial/origin_destination_bus_202309.csv")
# busTrips <- read_csv("data/aspatial/origin_destination_bus_202310.csv")
busTrips$ORIGIN_PT_CODE <- as.factor(busTrips$ORIGIN_PT_CODE)
busTrips$DESTINATION_PT_CODE <- as.factor(busTrips$DESTINATION_PT_CODE)Data wrangling
Aspatial Data Wrangling
calculating bus trip in weekday and morning peak
busTripsDayMorning <- busTrips %>%
filter(DAY_TYPE == "WEEKDAY",
TIME_PER_HOUR >= 6,
TIME_PER_HOUR <= 9) %>%
group_by(ORIGIN_PT_CODE) %>%
summarise(WeekdayMorningTrips = sum(TOTAL_TRIPS))calculating bus trip in weekday and afternoon peak
busTripsDayAfternoon <- busTrips %>%
filter(DAY_TYPE == "WEEKDAY",
TIME_PER_HOUR >= 17,
TIME_PER_HOUR <= 20) %>%
group_by(ORIGIN_PT_CODE) %>%
summarise(WeekdayAfternoonTrips = sum(TOTAL_TRIPS))calculating bus trip in weekend and morning peak
busTripsEndMorning <- busTrips %>%
filter(DAY_TYPE == "WEEKENDS/HOLIDAY",
TIME_PER_HOUR >= 11,
TIME_PER_HOUR <= 14) %>%
group_by(ORIGIN_PT_CODE) %>%
summarise(WeekendMorningTrips = sum(TOTAL_TRIPS))calculating bus trip in weekend and evening peak
busTripsEndEvening <- busTrips %>%
filter(DAY_TYPE == "WEEKENDS/HOLIDAY",
TIME_PER_HOUR >= 16,
TIME_PER_HOUR <= 19) %>%
group_by(ORIGIN_PT_CODE) %>%
summarise(WeekendEveningTrips = sum(TOTAL_TRIPS))Combining the Peak Trips
BusTrips_comb <- busTripsDayMorning %>%
left_join(busTripsDayAfternoon) %>%
left_join(busTripsEndMorning) %>%
left_join(busTripsEndEvening)Joining with `by = join_by(ORIGIN_PT_CODE)`
Joining with `by = join_by(ORIGIN_PT_CODE)`
Joining with `by = join_by(ORIGIN_PT_CODE)`
Geospatial Data Wrangling
connecting the bus stop and the hexagonal grids
grid_bus <- st_join(grid_count_rm,busStops,join = st_contains) %>%
unnest() %>%
select(grid_id,BUS_STOP_N)Warning: `cols` is now required when using `unnest()`.
ℹ Please use `cols = c(grid)`.
grid_bus$BUS_STOP_N <- as.factor(grid_bus$BUS_STOP_N)Joining aspatial and geospatial
combine the number of trip data and the geospatial data.
transforming the data based on the hexagonal grid id
Trips <- left_join(BusTrips_comb,grid_bus,
by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
group_by(grid_id)%>%
summarise(WeekdayMorningTrips = sum(WeekdayMorningTrips),
WeekdayAfternoonTrips = sum(WeekdayAfternoonTrips),
WeekendMorningTrips = sum(WeekendMorningTrips),
WeekendEveningTrips = sum(WeekendEveningTrips))Trips <- left_join(grid_count_rm,Trips) %>%
mutate (Total_Trips = WeekdayMorningTrips+WeekdayAfternoonTrips+WeekendMorningTrips +WeekendEveningTrips) %>%
rename (n_bus = n_colli)Joining with `by = join_by(grid_id)`
Trip per bus stop, to see if the trip amount is caused by number of bus station or is it really packed
TripsPerBusStop <- Trips %>%
mutate (WeekdayMorningTrips = WeekdayMorningTrips/n_bus,
WeekdayAfternoonTrips = WeekdayAfternoonTrips/n_bus,
WeekendMorningTrips = WeekendMorningTrips/n_bus,
WeekendEveningTrips = WeekendEveningTrips/n_bus)Using log to see if there is skewness
TripsLog <- Trips %>%
mutate (WeekdayMorningTrips = log(WeekdayMorningTrips),
WeekdayAfternoonTrips = log(WeekdayAfternoonTrips),
WeekendMorningTrips = log(WeekendMorningTrips),
WeekendEveningTrips = log(WeekendEveningTrips))Visualising
initial look of the overall number of trips in the peak times
tmap_mode("view")tmap mode set to interactive viewing
tm_shape(Trips) +
tm_fill(
col = "Total_Trips",
palette = "Blues",
style = "quantile",
title = "Total Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
popup.format = list(
Total_Trips = list(format = "f", digits = 0)
)
) +
tm_borders(col = "grey40", lwd = 0.7)Warning: popup.vars not specified whereas popup.format is a list
The plot above are from total bus trip per origin bus station.
From the plot of total trips above, can be seen that bus trips are spreaded around across the country. However, there are several clusters can be seen.
Total Trips
plotting the trips of each peak hours
weekday_morning <- tm_shape(Trips) +
tm_fill(
col = "WeekdayMorningTrips",
palette = "Blues",
style = "quantile",
title = "Weekday Morning Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekday_afternoon <- tm_shape(Trips) +
tm_fill(
col = "WeekdayAfternoonTrips",
palette = "Blues",
style = "quantile",
title = "Weekday Afternoon Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekend_morning <- tm_shape(Trips) +
tm_fill(
col = "WeekendMorningTrips",
palette = "Blues",
style = "quantile",
title = "Weekend Morning Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekend_evening <- tm_shape(Trips) +
tm_fill(
col = "WeekendEveningTrips",
palette = "Blues",
style = "quantile",
title = "Weekend Evening Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)tmap_mode("plot")tmap mode set to plotting
tmap_arrange(weekday_morning, weekday_afternoon, weekend_morning, weekend_evening,
ncol=2, nrow=2)
The plot above are segragated trips divided into 4 categories: Weekday morning, Weekday afternoon, Weekend morning, and weekend evening. There are only minimal differences between them. However a noticable different is on the east side. The east side quantile is lower on the evening or afternoon than in the morning both on weekend and weekday. Also, in the central can be seen that they have a higher quantile during the afternoon and evening.
Total Trips per Bus Stop
plotting the number of trips per peak hours per number of bus station
weekday_morning_per_stop <- tm_shape(TripsPerBusStop) +
tm_fill(
col = "WeekdayMorningTrips",
palette = "Blues",
style = "quantile",
title = "Weekday Morning Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekday_afternoon_per_stop <- tm_shape(TripsPerBusStop) +
tm_fill(
col = "WeekdayAfternoonTrips",
palette = "Blues",
style = "quantile",
title = "Weekday Afternoon Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekend_morning_per_stop <- tm_shape(TripsPerBusStop) +
tm_fill(
col = "WeekendMorningTrips",
palette = "Blues",
style = "quantile",
title = "Weekend Morning Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekend_evening_per_stop <- tm_shape(TripsPerBusStop) +
tm_fill(
col = "WeekendEveningTrips",
palette = "Blues",
style = "quantile",
title = "Weekend Evening Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)tmap_mode("plot")tmap mode set to plotting
tmap_arrange(weekday_morning_per_stop, weekday_afternoon_per_stop,
weekend_morning_per_stop, weekend_evening_per_stop,
ncol=2, nrow=2) 
the plot above shows the differences of total number of trips based of the time of day and the day. The number of total trip in a hexagon are divided per each number of bus station in the hexagon. However, the result are not change by a lot as well.
Log value of trips
plotting the log number of total trips for each peak hour times to see the skewness.
weekday_morning_log <- tm_shape(TripsLog) +
tm_fill(
col = "WeekdayMorningTrips",
palette = "Blues",
style = "quantile",
title = "Weekday Morning Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekday_afternoon_log <- tm_shape(TripsLog) +
tm_fill(
col = "WeekdayAfternoonTrips",
palette = "Blues",
style = "quantile",
title = "Weekday Afternoon Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6,
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekend_morning_log <- tm_shape(TripsLog) +
tm_fill(
col = "WeekendMorningTrips",
palette = "Blues",
style = "quantile",
title = "Weekend Morning Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)weekend_evening_log <- tm_shape(TripsLog) +
tm_fill(
col = "WeekendEveningTrips",
palette = "Blues",
style = "quantile",
title = "Weekend Evening Trips",
id = "grid_id",
showNA = FALSE,
alpha = 0.6
) +
tm_borders(col = "grey40", lwd = 0.7) +
tm_legend(scale = 0.5)tmap_mode("plot")tmap mode set to plotting
tmap_arrange(weekday_morning_log, weekday_afternoon_log,
weekend_morning_log, weekend_evening_log,
ncol=2, nrow=2)
The above plot shows the value of total number of trip per hexagon after applying log to the number. There are also minimal differences between them and previous plots.
Local Indicators of Spatial Association (LISA) Analysis
perfoming LISA analysis to see the correlation between hexagonal grids
Spatial Weight
using the inverse distance based weight to see the correlation between the hexagons.
wm_idw <- Trips %>%
mutate(nb = st_dist_band(grid),
wts = st_inverse_distance(nb, grid,
scale = 1,
alpha = 1),
.before = 1) %>%
mutate(grid_id = 1:length(Trips$grid_id))! Polygon provided. Using point on surface.
! Polygon provided. Using point on surface.
Local Moran
calculating the local moran of every peak hours trips
Weekday Morning
lisa_DM <- wm_idw %>%
mutate(local_moran = local_moran(
WeekdayMorningTrips, nb, wts, nsim = 99, na.action=na.pass),
.before = 1) %>%
unnest(local_moran)Warning: There were 2 warnings in `stopifnot()`.
The first warning was:
ℹ In argument: `local_moran = local_moran(WeekdayMorningTrips, nb, wts, nsim =
99, na.action = na.pass)`.
Caused by warning in `lag.listw()`:
! NAs in lagged values
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
ii_val_moran_DM <- tm_shape(lisa_DM) +
tm_fill("ii") +
tm_borders(alpha = 0.5) +
tm_view(set.zoom.limits = c(6,8)) +
tm_layout(main.title = "local Moran's I of Trip",
main.title.size = 0.8)p_val_moran_DM <- tm_shape(lisa_DM) +
tm_fill("p_ii_sim") +
tm_borders(alpha = 0.5) +
tm_layout(main.title = "p-value of local Moran's I",
main.title.size = 0.8)tmap_arrange(ii_val_moran_DM, p_val_moran_DM, ncol = 2)Variable(s) "ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

lisa_sig_DM <- lisa_DM %>%
filter(p_ii_sim < 0.05)
pal <- RColorBrewer::brewer.pal("Set1", n = 5)
weekday_morning_moran <- tm_shape(lisa_DM) +
tm_polygons() +
tm_borders(alpha = 0.5) +
tm_shape(lisa_sig_DM) +
tm_fill("mean", palette = pal , title = "Weekday Morning Trips") +
tm_borders(alpha = 0.4) +
tm_legend(scale = 0.5)Weekday Afternoon
lisa_DA <- wm_idw %>%
mutate(local_moran = local_moran(
WeekdayAfternoonTrips, nb, wts, nsim = 99, na.action=na.pass),
.before = 1) %>%
unnest(local_moran)Warning: There were 2 warnings in `stopifnot()`.
The first warning was:
ℹ In argument: `local_moran = local_moran(WeekdayAfternoonTrips, nb, wts, nsim
= 99, na.action = na.pass)`.
Caused by warning in `lag.listw()`:
! NAs in lagged values
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
ii_val_moran_DA <- tm_shape(lisa_DA) +
tm_fill("ii") +
tm_borders(alpha = 0.5) +
tm_view(set.zoom.limits = c(6,8)) +
tm_layout(main.title = "local Moran's I of Trip",
main.title.size = 0.8)p_val_moran_DA <- tm_shape(lisa_DA) +
tm_fill("p_ii_sim") +
tm_borders(alpha = 0.5) +
tm_layout(main.title = "p-value of local Moran's I",
main.title.size = 0.8)tmap_arrange(ii_val_moran_DA, p_val_moran_DA, ncol = 2)Variable(s) "ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

lisa_sig_DA <- lisa_DA %>%
filter(p_ii_sim < 0.05)
pal <- RColorBrewer::brewer.pal("Set1", n = 5)
weekday_afternoon_moran <- tm_shape(lisa_DA) +
tm_polygons() +
tm_borders(alpha = 0.5) +
tm_shape(lisa_sig_DA) +
tm_fill("mean", palette = pal , title = "Weekday Afternoon Trips") +
tm_borders(alpha = 0.4) +
tm_legend(scale = 0.5)Weekend Morning
lisa_EM <- wm_idw %>%
mutate(local_moran = local_moran(
WeekendMorningTrips, nb, wts, nsim = 99, na.action=na.pass),
.before = 1) %>%
unnest(local_moran)Warning: There were 2 warnings in `stopifnot()`.
The first warning was:
ℹ In argument: `local_moran = local_moran(WeekendMorningTrips, nb, wts, nsim =
99, na.action = na.pass)`.
Caused by warning in `lag.listw()`:
! NAs in lagged values
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
ii_val_moran_EM <- tm_shape(lisa_EM) +
tm_fill("ii") +
tm_borders(alpha = 0.5) +
tm_view(set.zoom.limits = c(6,8)) +
tm_layout(main.title = "local Moran's I of Trip",
main.title.size = 0.8)p_val_moran_EM <- tm_shape(lisa_EM) +
tm_fill("p_ii_sim") +
tm_borders(alpha = 0.5) +
tm_layout(main.title = "p-value of local Moran's I",
main.title.size = 0.8)tmap_arrange(ii_val_moran_EM, p_val_moran_EM, ncol = 2)Variable(s) "ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

lisa_sig_EM <- lisa_EM %>%
filter(p_ii_sim < 0.05)
pal <- RColorBrewer::brewer.pal("Set1", n = 5)
weekend_morning_moran <- tm_shape(lisa_EM) +
tm_polygons() +
tm_borders(alpha = 0.5) +
tm_shape(lisa_sig_EM) +
tm_fill("mean", palette = pal , title = "Weekend Morning Trips" ) +
tm_borders(alpha = 0.4) +
tm_legend(scale = 0.5)Weekend Evening
lisa_EE <- wm_idw %>%
mutate(local_moran = local_moran(
WeekendEveningTrips, nb, wts, nsim = 99, na.action=na.pass),
.before = 1) %>%
unnest(local_moran)Warning: There were 2 warnings in `stopifnot()`.
The first warning was:
ℹ In argument: `local_moran = local_moran(WeekendEveningTrips, nb, wts, nsim =
99, na.action = na.pass)`.
Caused by warning in `lag.listw()`:
! NAs in lagged values
ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
ii_val_moran_EE <- tm_shape(lisa_EE) +
tm_fill("ii") +
tm_borders(alpha = 0.5) +
tm_view(set.zoom.limits = c(6,8)) +
tm_layout(main.title = "local Moran's I of Trip",
main.title.size = 0.8)p_val_moran_EE <- tm_shape(lisa_EE) +
tm_fill("p_ii_sim") +
tm_borders(alpha = 0.5) +
tm_layout(main.title = "p-value of local Moran's I",
main.title.size = 0.8)tmap_arrange(ii_val_moran_EE, p_val_moran_EE, ncol = 2)Variable(s) "ii" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

lisa_sig_EE <- lisa_EE %>%
filter(p_ii_sim < 0.05)
pal <- RColorBrewer::brewer.pal("Set1", n = 5)
weekend_evening_moran <- tm_shape(lisa_EE) +
tm_polygons() +
tm_borders(alpha = 0.5) +
tm_shape(lisa_sig_EE) +
tm_fill("mean", palette = pal , title = "Weekend Evening Trips") +
tm_borders(alpha = 0.4) +
tm_legend(scale = 0.5)LISA MAP
plotting the LISA map
tmap_arrange(weekday_morning_moran, weekday_afternoon_moran,
weekend_morning_moran, weekend_evening_moran,
ncol = 2, nrow = 2)Warning: One tm layer group has duplicated layer types, which are omitted. To
draw multiple layers of the same type, use multiple layer groups (i.e. specify
tm_shape prior to each of them).
Warning: One tm layer group has duplicated layer types, which are omitted. To
draw multiple layers of the same type, use multiple layer groups (i.e. specify
tm_shape prior to each of them).
Warning: One tm layer group has duplicated layer types, which are omitted. To
draw multiple layers of the same type, use multiple layer groups (i.e. specify
tm_shape prior to each of them).
Warning: One tm layer group has duplicated layer types, which are omitted. To
draw multiple layers of the same type, use multiple layer groups (i.e. specify
tm_shape prior to each of them).

Conclusion
The graph above shows the local moran association between each hexagonal grids using the inverse distance weight.
The difference between week and and weekday is that on the weekend there are some similar clustering of low-low and high-high spread across the country, however on weekday most of it are only either high-low and low-high mean dispersion.
Can also be seen on the top 2 graph for weekday, on morning the dispersion happens in outer Singapore or suburban area, on the other hand on the afternoon the dispersion happen more toward the center. This is following the fact that early in the morning people go to work or school and go home in the after noon.
On the weekend graphs can be seen that most of the dispersion and clustering happened in the central area in the morning and a bit spread out around during the night.